/*------------------------------------------------------------------------------
*** PURPOSE: 	The following do-file generates the descriptive statistics
				discussed in the paper "Respondent biases in household surveys" 
				by Andrew Dillon and Edouard Romeo Mensah. 

*** NOTE:		The working sample was created in the do-file titled "master.do"
				contained in the folder "02_do" in the replication package.
				Some descriptive statistics are also tabulated in the "master.do"
				do-file.
				
*** OUTLINE:	1. Covariates
						A. Household head's characteristics
						B. Housing and asset characteristics
						
				2. Outcomes
						A. Landholding and non-labor input use characteristics
						B. Crop indicators
						C. Labor characteristics
						D. Labor characteristics conditional on using salaried labor
						E. Production/output characteristics
						F. Productivity characteristics
						G. Commercialization characteristics			
------------------------------------------------------------------------------*/


*							DESCRIPTIVE STATISTICS 						      *


	* 1. COVARIATES

	* A. Household head's characteristics

	global headchar "headgender headage headfinfeduc headfeduc hhsize headmarried headmigrant "	
	
	estpost summarize $headchar if treatarm==0
	esttab using "04_output\Description of household head's characteristics.csv", replace label ///
		cells("count min(fmt(2)) mean(fmt(2)) sd(fmt(2)) max(fmt(2))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = Control Households" "") ///
		title("Table 1A.1: Household head's characteristics") 
	estpost summarize $headchar if treatarm==1
	esttab using "04_output\Description of household head's characteristics.csv", append label ///
		cells("count min(fmt(2)) mean(fmt(2)) sd(fmt(2)) max(fmt(2))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T1 Households -- HH Head" "") ///
		title("Table 1A.2: Household head's characteristics") 
	estpost summarize $headchar if treatarm==2
	esttab using "04_output\Description of household head's characteristics.csv", append label ///
		cells("count min(fmt(2)) mean(fmt(2)) sd(fmt(2)) max(fmt(2))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T2 Households -- Random Proxy" "") ///
		title("Table 1A.3: Household head's characteristics") 
	estpost summarize $headchar
	esttab using "04_output\Description of household head's characteristics.csv", append label ///
		cells("count min(fmt(2)) mean(fmt(2)) sd(fmt(2)) max(fmt(2))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = All households" "") ///
		title("Table 1A.4: Household head's characteristics") 

	global headchar2 "headmarstat headfeduclvl headfeducdeg headethny headrel"
	
	local append "replace"
	foreach x of varlist $headchar2 {
		tabout `x' treatarm using "04_output\Description of household head's characteristics -- contd.xls", ///
		noff(2) c(freq col) f(0c 1p 1p) `append'
		local append "append"
	}
	

	* B. Housing and asset characteristics

	global housingasset "hhproptyowner nhouse nrooms asset_index_dn asset_index_d asset_index_n"
	
	estpost summarize $housingasset if treatarm==0
	esttab using "04_output\Description of housing and asset characteristics.csv", replace label ///
		cells("count min(fmt(2)) mean(fmt(2)) sd(fmt(2)) max(fmt(2))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = Control Households" "") ///
		title("Table 1B.1: Housing characteristics") 
	estpost summarize $housingasset if treatarm==1
	esttab using "04_output\Description of housing and asset characteristics.csv", append label ///
		cells("count min(fmt(2)) mean(fmt(2)) sd(fmt(2)) max(fmt(2))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T1 Households -- HH Head" "") ///
		title("Table 1B.2: Housing characteristics") 
	estpost summarize $housingasset if treatarm==2
	esttab using "04_output\Description of housing and asset characteristics.csv", append label ///
		cells("count min(fmt(2)) mean(fmt(2)) sd(fmt(2)) max(fmt(2))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T2 Households -- Random Proxy" "") ///
		title("Table 1B.3: Housing characteristics") 
	estpost summarize $housingasset
	esttab using "04_output\Description of housing and asset characteristics.csv", append label ///
		cells("count min(fmt(2)) mean(fmt(2)) sd(fmt(2)) max(fmt(2))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = All households" "") ///
		title("Table 1B.4: Housing characteristics") 

	global housingchar "housingtype floorhouse roofhouse wallhouse drinkws toilet trashdisp"
	
	local append "replace"
	foreach x of varlist $housingchar {
		tabout `x' treatarm using "04_output\Description of housing characteristics.xls", ///
		noff(2) c(freq col) f(0c 1p 1p) `append'
		local append "append"
	}
	
	local append "replace"
	foreach x of varlist assetcollind* {
		tabout `x' treatarm using "04_output\Description of asset characteristics.xls", ///
		noff(2) c(freq col) f(0c 1p 1p) `append'
		local append "append"
	}
	
	
	
	
	* 2. OUTCOMES

	* A. Landholding and non-labor input use characteristics
	
	#d ;
	global depvarsA "hanforest hasforest hapasture haedge hacrops hagarden 		
			haorchard HHlandsize HHplotsize percentlandcult 
			useferti fertival fertivalha irrigation" ;
	#d cr

	estpost summarize $depvarsA if treatarm==0
	esttab using "04_output\Description of landholding characteristics.csv", replace label ///
		cells("count min(fmt(2)) mean(fmt(2)) sd(fmt(2)) max(fmt(2))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = Control Households" "") ///
		title("Table 2A.1: Landholding and non-labor input use characteristics") 
	estpost summarize $depvarsA if treatarm==1
	esttab using "04_output\Description of landholding characteristics.csv", append label ///
		cells("count min(fmt(2)) mean(fmt(2)) sd(fmt(2)) max(fmt(2))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T1 Households -- HH Head" "") ///
		title("Table 2A.2: Landholding and non-labor input use characteristics") 
	estpost summarize $depvarsA if treatarm==2
	esttab using "04_output\Description of landholding characteristics.csv", append label ///
		cells("count min(fmt(2)) mean(fmt(2)) sd(fmt(2)) max(fmt(2))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T2 Households -- Random Proxy" "") ///
		title("Table 2A.3: Landholding and non-labor input use characteristics") 
	estpost summarize $depvarsA
	esttab using "04_output\Description of landholding characteristics.csv", append label ///
		cells("count min(fmt(2)) mean(fmt(2)) sd(fmt(2)) max(fmt(2))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = All households" "") ///
		title("Table 2A.4: Landholding and non-labor input use characteristics") 
		
		
	* B. Crop indicators
	
	#d ;
	global cereals "d_Rice d_Millet d_Sorghum d_Corn d_Fonio d_cereals" ;
	global legumes "d_Bean d_Peanut d_Bambaranut d_Sesame d_Soybean d_legumes" ;
	global vegetables "d_Tomato d_Onion d_Okra d_Lettuce d_Cabbage d_Squash 
		d_Pepper d_Cucumber d_Eggplant d_Garlic d_Sorrel d_Carrot d_Spinach 
		d_Corchorus d_Bellpepper d_vegetables" ;
	global vegetables2 "d_Tomato d_Onion d_Okra d_Sorrel d_vegetables2" ;
	global cashcrops "d_Cotton d_Sesame d_Corn d_Peanut d_cashcrops" ;
	global roottubers "d_Potato d_Yam d_Cassava d_Sweetpot d_roottubers" ;
	global fruits "d_Watermelon d_Melon d_fruits" ;
	#d cr
	
	global depvarsB "$cereals $legumes $vegetables2 d_Cotton d_cashcrops ncrops"

	estpost summarize $depvarsB if treatarm==0
	esttab using "04_output\Description of crop indicators.csv", replace label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = Control Households" "") ///
		title("Table 2B.1: Crop indicators") 
	estpost summarize $depvarsB if treatarm==1
	esttab using "04_output\Description of crop indicators.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T1 Households -- HH Head" "") ///
		title("Table 2B.2: Crop indicators") 
	estpost summarize $depvarsB if treatarm==2
	esttab using "04_output\Description of crop indicators.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T2 Households -- Random Proxy" "") ///
		title("Table 2B.3: Crop indicators") 
	estpost summarize $depvarsB
	esttab using "04_output\Description of crop indicators.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = All households" "") ///
		title("Table 2B.4: Crop indicators") 
	
	
	* C. Labor characteristics
	
	#d ;
	global depvarsC "usepaidlabor pdayYR_unpaidhhm-pdayYR_paidnonhhm 
		pdayYR_paidnonhhf pdayYR_paidnonhhc pdayYR_unpaidrel 
		pdayYR_unpaidnonrel pdayYR_unpaidhhtot-w_usdYR_labortot" ;
	#d cr
	
	estpost summarize $depvarsC if treatarm==0
	esttab using "04_output\Description of labor characteristics.csv", replace label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = Control Households" "") ///
		title("Table 2C.1: Labor characteristics") 
	estpost summarize $depvarsC if treatarm==1
	esttab using "04_output\Description of labor characteristics.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T1 Households -- HH Head" "") ///
		title("Table 2C.2: Labor characteristics") 
	estpost summarize $depvarsC if treatarm==2
	esttab using "04_output\Description of labor characteristics.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T2 Households -- Random Proxy" "") ///
		title("Table 2C.3: Labor characteristics") 
	estpost summarize $depvarsC
	esttab using "04_output\Description of labor characteristics.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = All households" "") ///
		title("Table 2C.4: Labor characteristics") 
	
	
	* D. Labor characteristics conditional on using salaried labor
	
	#d ;
	global depvarsD "pdayYR_unpaidhhm-pdayYR_paidnonhhm pdayYR_paidnonhhf 
		pdayYR_paidnonhhc pdayYR_unpaidrel pdayYR_unpaidnonrel 
		pdayYR_unpaidhhtot-w_usdYR_labortot" ;
	#d cr

	
	preserve
	
	keep if usepaidlabor==1
	estpost summarize $depvarsD if treatarm==0
	esttab using "04_output\Description of labor characteristics of HH using salaried labor.csv", replace label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = Control Households" "") ///
		title("Table 2D.1: Labor characteristics  of HH using salaried labor") 
	estpost summarize $depvarsD if treatarm==1
	esttab using "04_output\Description of labor characteristics of HH using salaried labor.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T1 Households -- HH Head" "") ///
		title("Table 2D.2: Labor characteristics  of HH using salaried labor") 
	estpost summarize $depvarsD if treatarm==2
	esttab using "04_output\Description of labor characteristics of HH using salaried labor.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T2 Households -- Random Proxy" "") ///
		title("Table 2D.3: Labor characteristics of HH using salaried labor") 
	estpost summarize $depvarsD
	esttab using "04_output\Description of labor characteristics of HH using salaried labor.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = All households" "") ///
		title("Table 2D.4: Labor characteristics of HH using salaried labor") 

	restore

	for var $depvarsD: clonevar condl_X=X
	for var $depvarsD: replace condl_X=. if usepaidlabor!=1
	ren condl_w_pdayYR_unpaidnonhhtot condl_w_pdayYR_unpdnonhhtot
	global depvarsD2 "usepaidlabor condl_*"
	descr $depvarsD2
	
	la var condl_pdayYR_unpaidhhm		"Unpaid HH male (pdays/yr)"
	la var condl_pdayYR_unpaidhhf		"Unpaid HH female (pdays/yr)"
	la var condl_pdayYR_unpaidhhc		"Unpaid HH child (pdays/yr)"
	la var condl_pdayYR_paidnonhhm		"Paid nonHH male (pdays/yr)"
	la var condl_pdayYR_paidnonhhf		"Paid nonHH female (pdays/yr)"
	la var condl_pdayYR_paidnonhhc		"Paid nonHH child (pdays/yr)"
	la var condl_pdayYR_unpaidrel		"Unpd relatives (pdays/yr)"
	la var condl_pdayYR_unpaidnonrel	"Unpd non-relatives (pdays/yr)"
	la var condl_pdayYR_unpaidhhtot		"Tot unpd HH labor (pdays/yr)"
	la var condl_adeqYR_unpaidhhtot		"Tot unpd HH labor (adeq/yr)"
	la var condl_pdayYR_paidnonhhtot	"Tot paid nonHH labor (pdays/yr)"
	la var condl_adeqYR_paidnonhhtot	"Tot paid nonHH labor (adeq/yr)"
	la var condl_pdayYR_unpaidnonhhtot	"Tot unpd nonHH labor (pdays/yr)"
	la var condl_pdayYR_childtot		"Tot child labor (pdays/yr)"
	la var condl_pdayYR_labortot		"Tot labor (pdays/yr)"
	la var condl_usdYR_paidnonhhm		"Paymt of sal nonHH male ($/yr)"
	la var condl_usdYR_paidnonhhf		"Paymt of sal nonHH female ($/yr)"
	la var condl_usdYR_paidnonhhc		"Paymt of sal nonHH child ($/yr)"
	la var condl_usdYR_labortot			"Tot paymt of sal labor ($/yr)"
	la var condl_w_pdayYR_unpaidhhm		"Unpd HH male (pdays/yr)--w"
	la var condl_w_pdayYR_unpaidhhf		"Unpd HH female (pdays/yr)--w"
	la var condl_w_pdayYR_unpaidhhc		"Unpd HH child (pdays/yr)--w"
	la var condl_w_pdayYR_paidnonhhm	"Paid nonHH male (pdays/yr)--w"
	la var condl_w_pdayYR_paidnonhhf	"Paid nonHH female (pdays/yr)--w"
	la var condl_w_pdayYR_paidnonhhc	"Paid nonHH child (pdays/yr)--w"
	la var condl_w_pdayYR_unpaidrel		"Unpd relatives (pdays/yr)--w"
	la var condl_w_pdayYR_unpaidnonrel	"Unpd non-relatives (pdays/yr)--w"
	la var condl_w_pdayYR_unpaidhhtot	"Tot unpd HH lb (pdays/yr)--w"
	la var condl_w_adeqYR_unpaidhhtot	"Tot unpd HH lb (adeq/yr)--w"
	la var condl_w_pdayYR_paidnonhhtot	"Tot paid nonHH lb (pdays/yr)--w"
	la var condl_w_adeqYR_paidnonhhtot	"Tot paid nonHH lb (adeq/yr)--w"
	la var condl_w_pdayYR_unpdnonhhtot	"Tot unpd nonHH lb (pdays/yr)--w"
	la var condl_w_pdayYR_childtot		"Tot child lb (pdays/yr)--w"
	la var condl_w_pdayYR_labortot		"Tot labor (pdays/yr)--w"
	la var condl_w_usdYR_paidnonhhm		"Paymt sal nonHH male ($/yr)--w"
	la var condl_w_usdYR_paidnonhhf		"Paymt sal nonHH female ($/yr)--w"
	la var condl_w_usdYR_paidnonhhc		"Paymt sal nonHH child ($/yr)--w"
	la var condl_w_usdYR_labortot		"Tot paymt of sal lb ($/yr)--w"


	* E. Production/output characteristics

	global depvarsE "output_* log_output_*"
	
	for var $depvarsE: replace X=. if ncrops==.
	
	estpost summarize $depvarsE if treatarm==0
	esttab using "04_output\Description of agricultural output characteristics.csv", replace label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = Control Households" "") ///
		title("Table 2E.1: Agricultural output characteristics") 
	estpost summarize $depvarsE if treatarm==1
	esttab using "04_output\Description of agricultural output characteristics.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T1 Households -- HH Head" "") ///
		title("Table 2E.2: Agricultural output characteristics") 
	estpost summarize $depvarsE if treatarm==2
	esttab using "04_output\Description of agricultural output characteristics.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T2 Households -- Random Proxy" "") ///
		title("Table 2E.3: Agricultural output characteristics") 
	estpost summarize $depvarsE
	esttab using "04_output\Description of agricultural output characteristics.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = All households" "") ///
		title("Table 2E.4: Agricultural output characteristics")
		
		
	* F. Productivity characteristics
	
	global depvarsF "yield_* pcprod_*"
	for var $depvarsF: replace X=. if ncrops==.
	
	estpost summarize $depvarsF if treatarm==0
	esttab using "04_output\Description of agricultural productivity characteristics.csv", replace label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = Control Households" "") ///
		title("Table 2F.1: Agricultural productivity characteristics") 
	estpost summarize $depvarsF if treatarm==1
	esttab using "04_output\Description of agricultural productivity characteristics.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T1 Households -- HH Head" "") ///
		title("Table 2F.2: Agricultural productivity characteristics") 
	estpost summarize $depvarsF if treatarm==2
	esttab using "04_output\Description of agricultural productivity characteristics.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T2 Households -- Random Proxy" "") ///
		title("Table 2F.3: Agricultural productivity characteristics") 
	estpost summarize $depvarsF
	esttab using "04_output\Description of agricultural productivity characteristics.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = All households" "") ///
		title("Table 2F.4: Agricultural productivity characteristics")
		

	* G. Commercialization characteristics
	
	global depvarsG "ymkt_* commindex_*"
	for var $depvarsG: replace X=. if ncrops==.
	
	estpost summarize $depvarsG if treatarm==0
	esttab using "04_output\Description of commercialization characteristics.csv", replace label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = Control Households" "") ///
		title("Table 2G.1: Commercialization characteristics") 
	estpost summarize $depvarsG if treatarm==1
	esttab using "04_output\Description of commercialization characteristics.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T1 Households -- HH Head" "") ///
		title("Table 2G.2: Commercialization characteristics") 
	estpost summarize $depvarsG if treatarm==2
	esttab using "04_output\Description of commercialization characteristics.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = T2 Households -- Random Proxy" "") ///
		title("Table 2G.3: Commercialization characteristics") 
	estpost summarize $depvarsG
	esttab using "04_output\Description of commercialization characteristics.csv", append label ///
		cells("count min(fmt(0)) mean(fmt(2)) sd(fmt(2)) max(fmt(0))") ///
		collabel("Obs." "Min" "Mean" "Std. dev." "Max") addn("Sample = All households" "") ///
		title("Table 2G.4: Commercialization characteristics")
		
		
